setwd("~/Dropbox/Interest Groups and Nominations/Replication files")

rm(list=ls(all=TRUE))
library(readstata13)
library(tidyverse)
library(readr)
library(readxl)
library(gridExtra)
library(xtable)
library(stargazer)
library(stringr)
library(gridExtra)

#GRAPH SETTINGS
title.size <- 20
nom.label.size <- 10
x.axis.tick.size <- 10
y.axis.tick.size <- 14
x.axis.label.size <- 18
y.axis.label.size <- 18

#note: story date does NOT have non-specific IG observations.
  #use tactics data to capture full range of IGs
#nominee-level data
nom.for.merge <- read.dta13("master_nominee_data_post_1930.dta", convert.underscore = TRUE)
nom.for.merge <- rename(nom.for.merge, year=year.announced)
nom.for.merge <- dplyr::select(nom.for.merge, nominee, confirmed, nom.name, 
                               date.vote, duration.confirmation, scandal, quality, 
                               segal.cover.score, date.announced, nsp1.new,year,dem.president)
nom.for.merge <- filter(nom.for.merge,nominee!="roberts_j_1")
nom.for.merge$nominee <- ifelse(nom.for.merge$nominee %in% c("roberts_j_1", "roberts_j_2"), "roberts_j", nom.for.merge$nominee)
nom.for.merge <- filter(nom.for.merge, nominee!="kavanaugh")

#use STORY data to measure overall mobilization
story.data <- read.dta13("combined_NYT_LAT_STORY_data.dta", convert.underscore=TRUE)
tactics.data <- read.dta13("combined_NYT_LAT_TACTICS_data.dta", convert.underscore=TRUE)


#code ERAs here

story.data$era <- factor(with(story.data, ifelse(date.announced < "1969-08-1", "1) 1930-1969",
                                                 ifelse( date.announced >= "1969-08-1" &  date.announced <= "1986-12-31", "2) 1969-1986",
                                                         ifelse(date.announced > "1986-12-31", "3) 1987-2017",NA)))))
tactics.data$era <- factor(with(tactics.data, ifelse(date.announced < "1969-08-1", "1) 1930-1969",
                                                 ifelse( date.announced >= "1969-08-1" &  date.announced <= "1986-12-31", "2) 1969-1986",
                                                         ifelse(date.announced > "1986-12-31", "3) 1987-2017",NA)))))


#####FIX NAMES HERE #######
########################################################################################################################
story.data$interest.group <- tools::toTitleCase (story.data$interest.group)
story.data$interest.group <- ifelse(story.data$interest.group=="American Federation of State County and Municipal Employees", "AFSCME", story.data$interest.group)
story.data$interest.group <- ifelse(story.data$interest.group %in% c("Afl-Cio","Aflcio"), "AFL-CIO", story.data$interest.group)
story.data$interest.group <- ifelse(nchar(story.data$interest.group) <=5, toupper(story.data$interest.group), story.data$interest.group)

#repear for tractics data
tactics.data$interest.group <- tools::toTitleCase (tactics.data$interest.group)
tactics.data$interest.group <- ifelse(tactics.data$interest.group=="American Federation of State County and Municipal Employees", "AFSCME", tactics.data$interest.group)
tactics.data$interest.group <- ifelse(tactics.data$interest.group %in% c("Afl-Cio","Aflcio"), "AFL-CIO", tactics.data$interest.group)
tactics.data$interest.group <- ifelse(nchar(tactics.data$interest.group) <=5, toupper(tactics.data$interest.group), tactics.data$interest.group)

#merge subset of vars onto IG data
foo <- dplyr::select(nom.for.merge, nominee, year, date.vote,dem.president)
story.data <- inner_join(story.data, foo, by="nominee")
length(unique(story.data$interest.group))##how many unique IGs? [should be 337]

# #build nominee-level data
#   #build string for nominee-day combo
# story.data$nominee.day <- paste(story.data$nominee, story.data$date.of.article)

nom.data <- as.data.frame(story.data  %>%  #calculate nom-level stats for IG participation
                            group_by(nominee) %>% 
                            summarise(
                              stories = length(unique(headline)), # # of unique stores per nominee
                              total.mentions = n(),#number of unique observations[groups-story] by nominee
                              total.groups = length(unique(interest.group[specific.ig==1])),# number of unique groups participating, for each nom
                              pro.groups = length(unique(interest.group[specific.ig==1 & for.nominee == 1])),#number of unique groups in favor of nom
                              neu.groups = length(unique(interest.group[specific.ig==1 & for.nominee == 0])),#number of unique groups who are neutral (or appear at least once as neutral)
                              neg.groups = length(unique(interest.group[specific.ig==1 & for.nominee == -1])),#number of unique groups against nom
                              pro.mentions = sum(for.nominee==1),#number of times groups are mentioned as supporting nominee
                              neu.mentions = sum(for.nominee==0),#number of times groups are mentioned as neutral
                              neg.mentions = sum(for.nominee==-1),#number of times groups are mentioned as opposing nominee
                              pro.stories = length(unique(headline[for.nominee==1])),
                              neu.stories = length(unique(headline[for.nominee==0])),
                              neg.stories = length(unique(headline[for.nominee==-1]))
                            ) )
foo <- dplyr::select(nom.for.merge, nominee)
nom.data <- full_join(nom.data, foo, by="nominee")
nom.data[is.na(nom.data)] <- 0#replace all missing with zero
nom.data <- full_join(nom.data, nom.for.merge, by="nominee")#now merge full nominee data
nom.data <- arrange(nom.data, date.announced)

#now merge in hearing data
hear.data <- read.dta13("hearing_data_individual_level_updated.dta", convert.underscore = TRUE, convert.factors=TRUE)
hear.data <- subset(hear.data, interest.group!="aba")
hear.data.agg <- subset(hear.data, hearings.written==1)  %>% #calculate aggregate participation rates
  group_by(nominee) %>% 
  summarise(  hear.total  = n())
hear.data.agg <- full_join(hear.data.agg, foo, by="nominee")
hear.data.agg[is.na(hear.data.agg)] <- 0#replace all missing with zero
nom.data <- full_join(nom.data, hear.data.agg, by="nominee")#now merge full nominee data
nom.data <- arrange(nom.data, date.announced)

#compare number of groups to CODY/HAL coding of Judiciaruy committe and compendium data (coding notes:
# • Interest group support is the number of groups presenting oral or written testimony for the nominee.
# • Data are available for those nominated since 1949.
# Data source: Calculated by Jeffrey A. Segal from http://www.gpoaccess.gov/congress/senate/judiciary/scourt.htm
# )
#individual level data
hear.data <- read.dta13("hearing_data_individual_level_updated.dta", convert.underscore = TRUE)
hear.data <- subset(hear.data, interest.group!="aba")
hear.data <- inner_join(hear.data,nom.data, by="nominee")

#take total number of written groups per nominee
hear.data.agg <- subset(hear.data, hearings.written==1)  %>% 
  group_by(nominee) %>% 
  summarise(  hear.total  = n())
foo <- dplyr::select(nom.data, nominee, total.groups, pro.groups, neg.groups)
foo <- inner_join(foo, hear.data.agg, by="nominee")
x.cor.with.gorsuch <- paste("corr. =",round(with(subset(foo, TRUE), cor(total.groups, hear.total)),2))
x.cor.without.gorsuch <- paste("corr. =",round(with(subset(foo, nominee!="gorsuch"), cor(total.groups, hear.total)),2))

p1 <- ggplot(foo, aes(total.groups, hear.total))  +
  geom_point() +   geom_smooth(method='lm', color="black") + 
  annotate("text", x =50, y = 0, label = x.cor.with.gorsuch, cex=7) +
  ggtitle("Comparing measures (with Gorsuch)")  + xlab("Newspapers") +  ylab("Judiciary Committee") +
  theme(plot.title = element_text(size =18, face = "bold", hjust=.5), 
        axis.text.x = element_text(size=x.axis.tick.size),
        axis.text.y = element_text(size=y.axis.tick.size),
        axis.title.x = element_text(size=y.axis.label.size),
        axis.title.y = element_text(size=y.axis.label.size))

p2 <- ggplot(subset(foo, nominee!="gorsuch"), aes(total.groups, hear.total))  +
  geom_point() + geom_smooth(method='lm', color="black") + 
  annotate("text", x =50, y = 10, label = x.cor.without.gorsuch, cex=7) +
  ggtitle("Comparing measures (without Gorsuch)") + xlab("Newspapers") +  ylab("Judiciary Committee") +
  theme(plot.title = element_text(size =18, face = "bold", hjust=.5), 
        axis.text.x = element_text(size=x.axis.tick.size),
        axis.text.y = element_text(size=y.axis.tick.size),
        axis.title.x = element_text(size=y.axis.label.size),
        axis.title.y = element_text(size=y.axis.label.size))

pdf("Figure_A1.pdf", height=6, width=12)
grid.arrange(p1,p2, ncol=2)
dev.off()

#now create database of unique interest group
ig.unique <- dplyr::select(story.data, interest.group, contains("group.label"),  contains("code"), 
                contains("ecology"), specific.ig, abort,labor,civ.rights,liberal, conservative, byear, dyear)
ig.unique$coder <- NULL
ig.unique <- filter(ig.unique, specific.ig==1)
ig.unique$specific.ig <- NULL
names(ig.unique)
ig.unique <- arrange(ig.unique[!duplicated(ig.unique), ], interest.group)#drop duplicates and sort by name

#now updated IG activities with respect to nominations
story.data.specific <- subset(story.data, specific.ig==1)#use just specific groups
first.year.ig <- story.data.specific %>%  #first year in which IG participated
  group_by(interest.group) %>% 
  summarise( first.year.part = min(year))

ig.nom.pairs <- subset(story.data.specific, select=c(nominee, interest.group))#calculate total participation for below//#make nominee-group pairs
ig.nom.pairs <- ig.nom.pairs[!duplicated(ig.nom.pairs), ]
total.par <- as.data.frame(table(ig.nom.pairs$interest.group))
names(total.par) <- c("interest.group", "total.noms")
#do same for hearing data
ig.nom.pairs <- subset(hear.data, select=c(nominee, interest.group))#calculate total participation for below//#make nominee-group pairs
ig.nom.pairs <- ig.nom.pairs[!duplicated(ig.nom.pairs), ]
total.par.hear <- as.data.frame(table(ig.nom.pairs$interest.group))
names(total.par.hear) <- c("interest.group", "total.noms.hear")

#now get nominee and date when first participated
foo <- dplyr::select(story.data.specific, interest.group, date.of.article, nominee)
foo <-  arrange( foo[!duplicated(foo), ], interest.group, date.of.article)
foo <- foo  %>% 
  group_by(interest.group) %>% 
  mutate( counter=row_number())
first.nominee <- foo$nominee[foo$counter==1]
igs <-  foo$interest.group[foo$counter==1]
ig.foo <- as.data.frame(cbind(igs, first.nominee))
names(ig.foo ) <- c("interest.group", "first.nominee")

#merge ALL onto ig.unique.data
ig.unique <- ig.unique %>%
  left_join(first.year.ig, by='interest.group') %>%
  left_join(total.par, by='interest.group')  %>%
  left_join(ig.foo, by='interest.group') 

#merge date_announced
foo <- dplyr::select(story.data.specific, nominee, date.announced)
foo <- foo[!duplicated(foo), ]
foo <- rename(foo, first.nominee=nominee)
foo <- rename(foo, first.date.mobil=date.announced)
ig.unique <- inner_join(ig.unique, foo)

#######
#stats for text--intro of data
dim(ig.unique)#how many unique groups?
length( table(story.data$nominee) )#how many unique nominees
length( unique(story.data$nominee) )
length (unique (paste (story.data$date.of.article, story.data$headline) ))#how many unique articles

################################################################################
################################################################################
##1st: levels of mobilization
################################################################################
################################################################################
#FIGURE 1 
#plot unique groups over time
nom.data$x <- c(1:nrow(nom.data))
nom.data$y <- nom.data$total.groups
max.y <- max(nom.data$y)

#add different color for failed nomines
p1 <- ggplot(data = nom.data)  +
  geom_line(aes(x,y)) +
  geom_point(data=subset(nom.data, confirmed==0), aes(x,y), shape=19, size=3) +
  geom_point(data=subset(nom.data, confirmed==1), aes(x,y), shape=1, size=3) +
  ggtitle("A) Interest group mobilization over time")  +  ylab("Number of groups") + 
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5), 
        axis.text.x = element_text(angle = 45, hjust = 1,size=x.axis.tick.size),  axis.title.x=element_blank(), 
        axis.text.y = element_text(size=y.axis.tick.size),   axis.title.y = element_text(size=y.axis.label.size))  +
  scale_x_discrete(limits = nom.data$x,labels=  nom.data$nom.name) + scale_y_continuous(breaks=seq(0,90,10),expand = c(0, 1)) +#expand lowers zero on y-axis
  coord_cartesian(ylim = c(0,max.y)) +
  geom_vline(xintercept = nom.data$x[nom.data$nominee=="burger"], lty=2) +
  geom_vline(xintercept = nom.data$x[nom.data$nominee=="bork"], lty=2) +
  annotate("text", x = 28, y = 60, label = "1969",cex=5.2,hjust=1, srt=90) +
  annotate("text", x = 38, y = 60, label = "1987",cex=5.2,hjust=1, srt=90) 

# number of groups in support and opposition to each nominee
y.height <- 57
p2 <- ggplot(nom.data, aes(x=x))  +
  geom_line(aes(y=pro.groups),lty=1) + geom_line(aes(y=neg.groups),lty=2) +
  ggtitle("B) Interest group mobilization, support and opposition")  +
  ylab("Number of groups") + 
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5), 
        axis.text.x = element_text(angle = 45, hjust = 1,size=x.axis.tick.size), axis.title.x=element_blank(),   
        axis.text.y = element_text(size=y.axis.tick.size),  axis.title.y = element_text(size=y.axis.label.size))  +
  scale_x_discrete(limits = nom.data$x,labels=  nom.data$nom.name) +  coord_cartesian(ylim = c(0,y.height )) +
  scale_y_continuous(breaks=seq(0,50 ,10),expand = c(0, 1)) +#expand lowers zero on y-axis
  annotate("text", x = 38.5, y = 40, label = "Groups in opposition",cex=5.2,hjust=1) +
  annotate("text", x = 48, y = 30, label = "Groups in support",cex=5.2,hjust=0,angle=90) 

#combined 
pdf("Figure_1.pdf", height=10, width=9,  onefile = TRUE)
grid.arrange(p1,p2)
dev.off()


#STATs FOR TEXT
#add number of unique groups
#what's mean number of groups before burger?
ok <- with(nom.data, year<=1969 & nominee!="haynsworth")
with(nom.data, mean(total.groups[ok]))
#how many noms with zero 
with(nom.data, table(total.groups[ok]==0))

#what's mean number of groups in middle period
ok <- with(nom.data, year>=1969 & year<=1986& nominee!="haynsworth")
with(nom.data, mean(total.groups[ok]))

#what's mean number of groups in middle period
ok <- with(nom.data, year>=1987)
with(nom.data, mean(total.groups[ok]))
#excluding Bork
ok <- with(nom.data, year>=1987 & nominee!="bork")
with(nom.data, mean(total.groups[ok]))

################################################################################################
#now do number of "mentions"
################################################################################################
nom.data$x <- c(1:nrow(nom.data))
nom.data$y <- nom.data$total.mentions
max.y <- max(nom.data$y)+3

#how many mentions by people for the american way for bork
table(story.data$nominee == "bork" &  story.data$interest.group=="People for the American Way")

p1 <- ggplot(data = nom.data)  +
  geom_point(data=subset(nom.data, confirmed==0), aes(x,y), shape=19, size=3) +
  geom_point(data=subset(nom.data, confirmed==1), aes(x,y), shape=1) +
  geom_line(aes(x,y)) + 
  ggtitle("Interest group mobilization over time")  +
  ylab("Number of mentions") + 
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5), 
        axis.text.x = element_text(angle = 45, hjust = 1,size=x.axis.tick.size),
        axis.title.x=element_blank(), 
        axis.text.y = element_text(size=y.axis.tick.size),
        axis.title.y = element_text(size=y.axis.label.size))  +
  scale_x_discrete(limits = nom.data$x,labels=  nom.data$nom.name) +
  coord_cartesian(ylim = c(0,max.y)) +
  scale_y_continuous(breaks=seq(0,300,20),expand = c(0, 1)) +#expand lowers zero on y-axis
  geom_vline(xintercept = nom.data$x[nom.data$nominee=="burger"], lty=2) +
  geom_vline(xintercept = nom.data$x[nom.data$nominee=="bork"], lty=2) +
  annotate("text", x = 28, y = 155, label = "1969",cex=5.2,hjust=1, srt=90) +
  annotate("text", x = 37.5, y = 155, label = "1987",cex=5.2,hjust=1, srt=90) 

#add different color for failed nomines
p2 <- ggplot(data=nom.data,aes(x,y=neg.mentions))  +
  geom_line(aes(x,y=pro.mentions),lty=1) +  geom_line(aes(x,y=neg.mentions),lty=2) +
  ggtitle("Interest group mobilization over time, support and opposition")  +
  ylab("Number of mentions") + coord_cartesian(ylim = c(0,160)) +# theme_bw() +
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5), 
        axis.text.x = element_text(angle = 45, hjust = 1,size=x.axis.tick.size),axis.title.x=element_blank(), 
        axis.text.y = element_text(size=y.axis.tick.size),  axis.title.y = element_text(size=y.axis.label.size))  +
  coord_cartesian(ylim = c(0,max.y)) +  scale_x_discrete(limits = nom.data$x,labels=  nom.data$nom.name) +
  scale_y_continuous(breaks=seq(0,300,20),expand = c(0, 1)) +#expand lowers zero on y-axis
  annotate("text", x = 35.5, y = 95, label = "Groups in opposition",cex=5.2,hjust=1) +
  annotate("text", x = 52, y = 42, label = "Groups in support",cex=5.2,hjust=0,angle=90) 

pdf("Figure_A4.pdf", height=10, width=9)
grid.arrange(p1,p2)
dev.off()

####################################################################################
#PARTICIPATION BY GROUPS
####################################################################################
############
#dotplot of total nominations participated in, among most active groups
foo <- filter(dplyr::select(ig.unique, interest.group, total.noms), total.noms >=3)
foo1 <- filter(dplyr::select(ig.unique, interest.group, total.noms), total.noms >=1)
foo1 <- inner_join(story.data, foo1)

#####among the major groups--which nominations did they participate in?
ok <- c("NARAL","People for the American Way", "AFL-CIO", "NOW", "National Right to Life Committee")
main.igs <- filter(story.data.specific, interest.group %in% ok)
table(main.igs$interest.group, main.igs$nominee)
#####compare number of noms participated in across LAT and hearing data
xx <- data.frame( table(ig.unique$total.noms) )
p <- ggplot(xx, aes(Var1, Freq)) +  geom_bar(stat='identity', width=.50) +
  ggtitle("Distribution of number of nominations participated in")  +
  xlab("Number of nominations participated in") +  ylab("Number of groups") +
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5), 
        axis.text.x = element_text(angle =0,size=12),  axis.title.x= element_text(size=x.axis.label.size), 
        axis.text.y = element_text(size=12), axis.title.y = element_text(size=y.axis.label.size)) 

pdf("Figure_2.pdf", height=6, width=8)
p
dev.off()

#now do same for hearing data
xx <- data.frame(table( total.par.hear$total.noms.hear ))
p <- ggplot(xx, aes(Var1, Freq)) +
  geom_bar(stat='identity', width=.50) +
  ggtitle("Distribution of number of nominations participated in")  +
  xlab("Number of nominations participated in") +  ylab("Number of groups") +
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5), 
        axis.text.x = element_text(angle =0,size=12),
        axis.title.x= element_text(size=x.axis.label.size), 
        axis.text.y = element_text(size=12),
        axis.title.y = element_text(size=y.axis.label.size)) 

pdf("Figure_A6.pdf", height=6, width=8)
p
dev.off()

#stats for text
x <- table(total.par.hear$interest.group)
length(x) #587 groups 
table(x==1)
mean(x==1)#-- 80% are one-shots 

############################################################################################################
############################################################################################################
#ECOLOGY ANALYSIS
############################################################################################################
############################################################################################################
#list of most active group
#table groups by nominee [Table 2 in draft]
with(story.data, sort(table(interest.group[specific.ig==1])), decreasing==TRUE)
#identify 10 most frequent groups for 1) Hughes to Burger 2) Haynsworth to Scalia #3) Bork to Gorsuch
story.data <- arrange(story.data, date.announced)

#had been using mobilization--better to use nominations
foo <- subset(story.data, specific.ig==1, select=c(interest.group, nominee, era))
foo <- foo[!duplicated(foo), ]
foo$interest.group <- dplyr::recode(foo$interest.group, "Leadership Conference on Civil Rights"="LCCR")

####new: add column with all groups
foo2 <- subset(foo, TRUE)
group0 <- with(foo2, table(interest.group))
group0 <- sort(group0, decreasing=TRUE)[1:10]
names0 <- paste0(tools::toTitleCase(names(group0)), " (",group0,")")

#1) Hughes to Burger
foo2 <- subset(foo, era == levels(foo$era)[1])
group1 <- with(foo2, table(interest.group))
group1 <- sort(group1, decreasing=TRUE)[1:10]
names1 <- paste0(tools::toTitleCase(names(group1)), " (",group1,")")

#2) Haynsworth to Scalia 
foo2 <- subset(foo, era == levels(story.data$era )[2])
group2 <- with(foo2, table(interest.group))
group2 <- sort(group2, decreasing=TRUE)[1:10]
names2 <-  paste0(tools::toTitleCase(names(group2)), " (",group2,")")

#3) Bork to Gorsuch
foo2 <- subset(foo, era == levels(story.data$era)[3])
group3 <- with(foo2, table(interest.group))
group3 <- sort(group3, decreasing=TRUE)[1:10]
names3 <- paste0(tools::toTitleCase(names(group3)), " (",group3,")")

#make table
foo <- ""
x <- data.frame(paste(foo, names0), paste(foo, names1),paste(foo, names2), paste(foo, names3))
names(x) <- c("All", "Hughes to Burger", "Haynsworth to Scalia", "Bork to Gorsuch")
xtable(x)
print(xtable(x), include.rownames=FALSE)

#make vertical table
foo.all <- rbind("All", data.frame(names0))
foo.all$names0 <- as.character(foo.all$names0)
foo.all$names0[is.na(foo.all$names0)] <- "All"

foo.1 <- rbind("All", data.frame(names1))
foo.1$names1 <- as.character(foo.1$names1)
foo.1$names1[is.na(foo.1$names1)] <- "Hughes to Burger"
names(foo.1) <- "names0"

foo.2 <- rbind("All", data.frame(names2))
foo.2$names2 <- as.character(foo.2$names2)
foo.2$names2[is.na(foo.2$names2)] <- "Haynsworth to Scalia"
names(foo.2) <- "names0"

foo.3 <- rbind("All", data.frame(names3))
foo.3$names3 <- as.character(foo.3$names3)
foo.3$names3[is.na(foo.3$names3)] <- "Bork to Gorsuch"
names(foo.3) <- "names0"

x <- rbind(foo.all, foo.1, foo.2, foo.3)
xtable(x)
print(xtable(x), include.rownames=FALSE)


######################################################
#Figure 3: labor/civil rights mobilization over time
######################################################
#have to take into account multiple categorizations for any nominee
#reshape to "double-count"
#group.label1, group.label2, group.label3
foo <- filter(story.data, specific.ig==1)
#Was using mentions--but better to use unique groups
foo$temp <- paste(foo$nominee, foo$interest.group)
foo <- foo[!duplicated(foo$temp), ]
foo <- dplyr::select(foo, nominee, group.code1, group.code2, group.code3)
foo <- arrange(foo, nominee)

#make counter within group
foo2 <-foo %>%
  gather(`group.code1`, `group.code2`, `group.code3`,#columns that represent values, not variables
         key="group", #this is the 'key' -- the name of the var whose values form the column names
         value="code", #the name variable whose values are spread over the cells
         factor_key=TRUE) #key values stored as a factor
foo2 <- subset(foo2, !is.na(code))

foo2$labor <- ifelse(foo2$code %in% c(1),1,0)
foo2$abort <- ifelse(foo2$code %in% c(9,10),1,0)
foo2$civ.rights <- ifelse(foo2$code %in% c(5),1,0)
foo2$liberal <- ifelse(foo2$code %in% c(12, 25),1,0)
foo2$conservative <- ifelse(foo2$code %in% c(13,26),1,0)

#now aggregate to nom level
comp.data <- foo2 %>% 
  group_by(nominee) %>% 
  summarise(
    Labor = sum(labor), 
    CivilRights = sum(civ.rights),
    Abortion = sum(abort),
    Liberal = sum(liberal),
    Conservative = sum(conservative)
  )

#merge other nominees onto data (to make them zero when missing)
foo3 <- dplyr::select(nom.data, nominee)
comp.data <- full_join(comp.data, foo3)
comp.data[is.na(comp.data)] <- 0
#now reshape
comp.data <- gather(comp.data, type, mobil, -nominee)
comp.data$type<-ifelse(comp.data$type=="CivilRights", "Civil rights",comp.data$type)

#merge again to get nom name and date
foo4 <- dplyr::select(nom.data, nominee, date.announced,nom.name)
comp.data <- full_join(comp.data, foo4)
comp.data <- arrange(comp.data, desc(type),date.announced)
comp.data$nominee <- NULL
comp.data$x <- rep(c(1:nrow(nom.data)), length(unique(comp.data$type)))#for x-axis

#drop lib/con for now
comp.data.1 <- subset(comp.data, type!="Liberal" & type!= "Conservative")
#reorder factors
comp.data.1$type <- factor(comp.data.1$type, levels = c("Labor", "Civil rights", "Abortion"))
x.axis.tick.size <- 10

p1 <- ggplot(comp.data.1, aes(x, mobil)) +
  geom_smooth(method = "loess", size = 1.5, fill=NA, span=.5, color="black") + facet_grid(type~.) +
  geom_point() +   ylab("Levels of mobilization") +
  ggtitle("A) Abortion/Civil Rights/Labor\nmobilization over time")  +
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5,margin = margin(t = 10, b = -40)), 
        axis.text.x = element_text(angle = 45, hjust = 1,size=x.axis.tick.size),
        axis.title.x=element_blank(), 
        axis.text.y = element_text(size=y.axis.tick.size),
        axis.title.y = element_text(size=y.axis.label.size))  +
  scale_x_discrete(limits = comp.data.1$x,labels=comp.data.1$nom.name)


#now do liberal versus conservative 
comp.data.1 <- subset(comp.data, type=="Liberal" | type== "Conservative")
#reorder factors
comp.data.1$type <- factor(comp.data.1$type, levels = c("Liberal", "Conservative"))

p2 <- ggplot(comp.data.1, aes(x, mobil)) +
  geom_smooth(method = "loess", size = 1.5, fill=NA, span=.5, color="black") + facet_grid(type~.) +
  geom_point() +   ylab("Levels of mobilization")+
  ggtitle("B) Liberal versus conservative\nmobilization over time")  +
  ylim(0,max(comp.data$mobil)) +  
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5,
                                  margin = margin(t = 10, b = -50)),
        axis.text.x = element_text(angle = 45, hjust = 1,size=x.axis.tick.size),
        axis.title.x=element_blank(), 
        axis.text.y = element_text(size=y.axis.tick.size),
        axis.title.y = element_text(size=y.axis.label.size))  +  
  scale_x_discrete(limits = comp.data$x,labels=comp.data$nom.name) 

#now stack in single figure
lay <-  cbind(c(5,3))
pdf("Figure_3.pdf", height=12, width=10)
grid.arrange(p1,p2,  ncol=1, widths = 1:1,   heights=unit(c(7,5), c("in", "in")))
dev.off()


##############################################################################################################
#Now do broad cateogorization of  interest group ecology over time
##############################################################################################################
#have to take into account multiple categorizations for any nominee
#reshape to "double-count"
#group.label1, group.label2, group.label3
ecol.data <- subset(story.data, specific.ig==1)
ecol.data <- dplyr::select(ecol.data, nominee, interest.group, ecology1, ecology2, ecology3, era )
ecol.data <- arrange(ecol.data, nominee)

#make counter within group
ecol.data.reshaped <-ecol.data %>%
  gather(`ecology1`, `ecology2`, `ecology3`,#columns that represent values, not variables
         key="group", #this is the 'key' -- the name of the var whose values form the column names
         value="code", #the name variable whose values are spread over the cells
         factor_key=TRUE) #key values stored as a factor
ecol.data.reshaped <- subset(ecol.data.reshaped, code!="")
ecol.data.reshaped$code <- factor(ecol.data.reshaped$code)

#now aggregate to nom level
comp.data <- ecol.data.reshaped %>% 
  group_by(era, code) %>% 
  summarise(  count=n()) %>%
  complete (era, code)#add observations for combinations with no data
comp.data <- arrange(comp.data,   code, era)
comp.data
comp.data <- comp.data[!duplicated(comp.data),]#delete duplicate rows
comp.data$count[is.na(comp.data$count)] <- 0 #turn missing to zero
comp.data

#now add total counts by era
comp.data.foo <- ecol.data.reshaped %>% 
  group_by(era) %>% 
  summarise(  total.in.era=n() )

comp.data <- full_join(comp.data, comp.data.foo)
#now normalize by total
comp.data$count.normal <- 100*comp.data$count / comp.data$total.in.era
comp.data <- arrange(comp.data,  era, code)
comp.data$x <- rep(c(1:3),each=n_distinct(comp.data$code))

#order categories by increasing aggregate participation
comp.data <- comp.data %>% 
  group_by(code) %>%
  mutate(overall.count = sum(count))
comp.data <- arrange(comp.data, overall.count)
var_width <- 15 

comp.data <- mutate(comp.data, pretty_code = as.factor(str_wrap(code, width = var_width))) #to have facet labels on two lines
comp.data$pretty_code <- reorder(comp.data$pretty_code, comp.data$overall.count)

p1 <- ggplot(comp.data, aes(x, count.normal)) +
  facet_grid(.~pretty_code) +
  geom_line()  +    geom_point() +
  #ggtitle("Interest group ecology over time")  +
  ylab("Percentage of mobilization, by era") + 
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5), 
        axis.text.x = element_text(angle = 45, hjust = 1,size=x.axis.tick.size), axis.title.x=element_blank(), 
        axis.text.y = element_text(size=y.axis.tick.size),axis.title.y = element_text(size=y.axis.label.size),
        strip.text.x = element_text(size = 12, angle = 0)) +
  scale_x_discrete(limits=c(1:3), labels= substring( unique(comp.data$era) , 4, nchar(as.character(unique(comp.data$era)))))

p2 <- ggplot(comp.data, aes(x, count)) +
  facet_grid(.~pretty_code) +
  geom_line()  +    geom_point() +
  #  ggtitle("Interest group ecology over time")  +
  ylab("Total mobilization, by era") + 
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5), 
        axis.text.x = element_text(angle = 45, hjust = 1,size=x.axis.tick.size),
        axis.title.x=element_blank(), 
        axis.text.y = element_text(size=y.axis.tick.size),
        axis.title.y = element_text(size=y.axis.label.size),
        strip.text.x = element_text(size = 12, angle = 0)) +
  scale_x_discrete(limits=c(1:3),  labels= substring( unique(comp.data$era) , 4, nchar(as.character(unique(comp.data$era)))))

#combined
pdf("Figure_4.pdf", height=10, width=9)
grid.arrange(p2,p1)
dev.off()

#for text:
#For example, in the 1930-1969 period, the total mobilization among occupational groups was 
with(comp.data, table(count[code=="Occupational groups" & era=="1) 1930-1969"]))

#when did chamber of commerce first mobil
table(ig.unique$first.nominee[ig.unique$interest.group=="Chamber of Commerce"])
table(story.data$headline[story.data$interest.group=="Chamber of Commerce" & story.data$nominee=="thomas"])
##################################################
#ANALYSIS OF TACTICS
##################################################
  #different from old script-data is already reshaped long
#By era, calculate each type of advocacy
tactics.sum <- tactics.data  %>% 
  group_by(era, tactic) %>% 
  summarise( count = n())%>%
  complete (tactic, era )
tactics.sum <- tactics.sum[!duplicated(tactics.sum),]#delete duplicate rows
tactics.sum$count[is.na(tactics.sum$count)] <- 0 #turn missing to zero

tac.taxonomy <- read.csv("tactics_taxonomy.csv")
tac.taxonomy <- filter(tac.taxonomy, tactic!="")
tac.taxonomy$class <- NULL

#merge advocacy categories
tactics.sum <- inner_join(tactics.sum, tac.taxonomy)
head(tactics.sum)

#now add total counts by era and merge
foo <- tactics.data %>% 
  group_by(era) %>% 
  summarise(  total.in.era=n() )

tactics.sum <- inner_join(tactics.sum, foo)
tactics.sum$percent.activity <- round(tactics.sum$count/tactics.sum$total.in.era*100)

#############################################################################################################################################################
#now do so same  type of advocacy over time
##############################################################################################################################################################
advocacy.data <- dplyr::select(tactics.data, era,interest.group,specific.ig, type.of.advocacy)
head(advocacy.data)

#Statistics for text
#what is most common tactic in early period
with(tactics.data, sort(prop.table(table(tactic[era=="1) 1930-1969"]))))
with(tactics.data, sort(prop.table(table(tactic[era=="2) 1969-1986"]))))
with(tactics.data, sort(prop.table(table(tactic[era=="3) 1987-2017"]))))
with(advocacy.data, sort(prop.table(table(type.of.advocacy[era=="1) 1930-1969"]))))
with(advocacy.data, sort(prop.table(table(type.of.advocacy[era=="2) 1969-1986"]))))
with(advocacy.data, sort(prop.table(table(type.of.advocacy[era=="3) 1987-2017"]))))

#By era, calculate each type of advocacy
advocacy.sum <- advocacy.data  %>% 
  group_by(era, type.of.advocacy) %>% 
  summarise( count = n())%>%
  complete (type.of.advocacy, era )
advocacy.sum <- advocacy.sum[!duplicated(advocacy.sum),]#delete duplicate rows
advocacy.sum$count[is.na(advocacy.sum$count)] <- 0 #turn missing to zero

#now add total counts by era and merge
foo <- advocacy.data %>% 
  group_by(era) %>% 
  summarise(  total.in.era=n() )

advocacy.sum <- inner_join(advocacy.sum, foo)
advocacy.sum$percent.activity <- round(advocacy.sum$count/advocacy.sum$total.in.era*100)
advocacy.sum$count.normal <- 100* (advocacy.sum$count / advocacy.sum$total.in.era ) 
advocacy.sum <- arrange(advocacy.sum,  era, type.of.advocacy)
advocacy.sum$x <- rep(c(1:3),each=n_distinct(advocacy.sum$type.of.advocacy))

#order categories by increasing aggregate participation
advocacy.sum <- advocacy.sum %>% 
  group_by(type.of.advocacy) %>%
  mutate(overall.count = sum(count))
advocacy.sum <- arrange(advocacy.sum, overall.count)
advocacy.sum$type.of.advocacy <- factor( tools::toTitleCase (as.character(advocacy.sum$type.of.advocacy)))
advocacy.sum$type.of.advocacy <- factor(advocacy.sum$type.of.advocacy, levels=c("Inside", "Outside", "Grassroots", "Other"))

##RAW COUNTS
strip.size <- 14

p1 <- ggplot(advocacy.sum, aes(x, count)) +
  # facet_grid(.~reorder(advocacy, overall.count)) +
  facet_grid(.~type.of.advocacy) +
  geom_line()  +    geom_point() +
  # ggtitle("Types of advocacy over time")  +
  ylab("Counts of types of advocacy, by era") + 
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5), 
        axis.text.x = element_text(angle = 45, hjust = 1,size=x.axis.tick.size),
        axis.title.x=element_blank(), 
        axis.text.y = element_text(size=y.axis.tick.size),
        axis.title.y = element_text(size=y.axis.label.size),
        strip.text.x = element_text(size = strip.size, angle = 0)
  ) +
  scale_x_discrete(limits=c(1:3), labels= unique(advocacy.sum$era)) 

#Normalizing by overall activity in each period
p2 <- ggplot(advocacy.sum, aes(x, count.normal)) +
  # facet_grid(.~reorder(advocacy, overall.count)) +
  facet_grid(.~type.of.advocacy) +
  geom_line()  +    geom_point() + 
  # ggtitle("Types of advocacy over time")  +
  ylab("Percentage of type of advocacy, by era") + 
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5), 
        axis.text.x = element_text(angle = 45, hjust = 1,size=x.axis.tick.size),
        axis.title.x=element_blank(), 
        axis.text.y = element_text(size=y.axis.tick.size),
        axis.title.y = element_text(size=y.axis.label.size),
        strip.text.x = element_text(size = strip.size, angle = 0)
  ) +
  scale_x_discrete(limits=c(1:3), labels= unique(advocacy.sum$era)) + 
  scale_y_continuous(breaks = seq(0,60, by = 10), limits=c(0,65))


#combined for paper
pdf("Figure_5.pdf", height=10, width=9,  onefile = TRUE)
grid.arrange(p1,p2)
dev.off()

#stats for text
#what was most common inside lobbying tactic in early period?
sort( with(filter(tactics.data, era=="1) 1930-1969"), table(tactic)))
with(filter(tactics.data, era=="1) 1930-1969"), mean(tactic %in% 
      c("Testify (or provide affidavit, submit written testimony, or accompany witness)", "Send letter/fax to member of Congress or staff")))
#how much outside lobbying?
sort( with(filter(tactics.data, era=="1) 1930-1969"), table(type.of.advocacy)))
sort( with(filter(tactics.data, era=="1) 1930-1969"), mean(type.of.advocacy=="outside")))
#how much contact with the press
with(filter(tactics.data, era=="1) 1930-1969"), mean(tactic %in% 
  c("Press conference/Press release/statement to press or journalist (quoted in article)")))

#MIDDLE PERIOD
sort( with(filter(tactics.data, era=="2) 1969-1986"), table(type.of.advocacy)))
sort( with(filter(tactics.data, era=="2) 1969-1986"), mean(type.of.advocacy=="outside")))
sort( with(filter(tactics.data, era=="2) 1969-1986"), mean(type.of.advocacy=="grassroots")))

sort( with(filter(tactics.data, era=="2) 1969-1986"), table(tactic)))
sort( with(filter(tactics.data, era=="2) 1969-1986"), mean(tactic=="Press conference/Press release/statement to press or journalist (quoted in article)")))
sort( with(filter(tactics.data, era=="2) 1969-1986"), mean(tactic=="Testify (or provide affidavit, submit written testimony, or accompany witness)")))

#LATE PERIOD
sort( with(filter(tactics.data, era=="3) 1987-2017"), table(type.of.advocacy)))
sort( with(filter(tactics.data, era=="3) 1987-2017"), mean(type.of.advocacy=="outside")))
sort( with(filter(tactics.data, era=="3) 1987-2017"), mean(type.of.advocacy=="grassroots")))
sort( with(filter(tactics.data, era=="3) 1987-2017"), mean(type.of.advocacy=="inside")))

sort( with(filter(tactics.data, era=="3) 1987-2017"), table(tactic)))
sort( with(filter(tactics.data, era=="3) 1987-2017"), mean(tactic=="Press conference/Press release/statement to press or journalist (quoted in article)")))
sort( with(filter(tactics.data, era=="3) 1987-2017"), mean(tactic=="Testify (or provide affidavit, submit written testimony, or accompany witness)")))


#now do crosstab of type of group by type of advocacy
#need to merge ecology onto 
ecol <- dplyr::select(ecol.data.reshaped, interest.group, code)
ecol <- ecol[!duplicated(ecol), ]
ecol <- rename(ecol, type=code )

advocacy.data <-  filter(advocacy.data, specific.ig==1)

dim(advocacy.data)
foo2 <- inner_join(advocacy.data , ecol)
#drop others
foo2 <- filter(foo2, foo2$type!="Other" & foo2$type.of.advocacy !="other")
dim(foo2)
#write.dta(foo2, "temp.dta")
foo2$type.of.advocacy <- factor(foo2$type.of.advocacy) 
t <- table(droplevels(foo2$type), droplevels(foo2$type.of.advocacy))
prop.table(t)

#plot proportion of each advocacy by type of organization
#JUST KEEP MAIN GROUPS
foo3 <- filter(foo2, type %in% c("Identity groups", "Occupational groups", "Public interest (citizen) groups"))
foo3$advocacy <- as.factor(tools::toTitleCase(as.character(foo3$type.of.advocacy)))
foo3$advocacy <- relevel (foo3$type.of.advocacy, ref="inside")
#upper case
levels(foo3$advocacy) <- c("Inside","Grassroots","Outside")

p <- ggplot(foo3) + aes(x=advocacy, fill=advocacy) +
  geom_bar(width=.1) +facet_wrap(type~era,  scales = "free") + scale_fill_grey() + theme_bw() + 
  xlab("Type of advocacy") +  ylab("Count of tactics")  +
  ggtitle("Changing tactics over time, by group type")  +
  theme(plot.title = element_text(size =title.size, face = "bold",  hjust=.5), 
        axis.text.x = element_text(size=10),
        axis.text.y = element_text(size=10),
        axis.title.y = element_text(size=y.axis.label.size),
        axis.title.x = element_text(size=y.axis.label.size)) +
  theme(legend.position="none")

pdf("Figure_6.pdf", height=8, width=9)
p
dev.off()


##############################################################################################################
##############################################################################################################
#TIMING OF MOBILIZATION
#reorder factor 
tactics.data$timingf <-  factor(tactics.data$timing, levels=c("pre_hearing", "during_hearing", "post_hearing"))
table(tactics.data$timing, useNA="always")
#foo <- arrange( dplyr::select(tactics.data, nominee, effective.date, date.hearing.start, date.hearing.end, date.vote, timing), effective.date)
#View(subset(foo, nominee=="black"))
timing.data <- tactics.data  %>% 
  group_by(nominee, timingf) %>% 
  summarise(total.mentions=n())
timing.data <-  timing.data %>%  #convert to wide [nominee level]
  spread(key=timingf, #
         value=total.mentions)

#merge other nominees onto data (to make them zero when missing)
foo3 <- dplyr::select(nom.data, nominee,date.announced,nom.name )
timing.data <- full_join(timing.data, foo3)
#convert missing to zero
timing.data$during_hearing[is.na(timing.data$during_hearing)] <- 0
timing.data$post_hearing[is.na(timing.data$post_hearing)] <- 0
timing.data$pre_hearing[is.na(timing.data$pre_hearing)] <- 0

#now reshape back to long
timing.data <- ungroup( timing.data %>% #ungroup so I can use "arrange" below properly
                          gather(`during_hearing`, `post_hearing` ,`pre_hearing` , 
                                 key="timingf", value="total.mentions"))
#merge again to get nom name and date
timing.data <- arrange(timing.data, timingf, date.announced)
timing.data$nominee <- NULL
timing.data$x <- rep(c(1:nrow(nom.data)), length(unique(timing.data$timingf)))#for x-axis
timing.data$foo <- rep(c(2,3,1), each=nrow(nom.data))#for ordering panels in plot


#now do stacked barplot--by nominee#reorder factor 
timing.data$timingf <-  factor(timing.data$timingf, levels=c("pre_hearing", "during_hearing", "post_hearing"))
timing.data$Timing <- timing.data$timingf
timing.data$Timing <- dplyr::recode(timing.data$Timing, pre_hearing="Pre hearing",  during_hearing="During hearing", post_hearing="Post hearing")

p1 <- ggplot(timing.data, aes(x, y= total.mentions,fill=Timing)) +
  geom_bar(stat = "identity") +
  ylab("Number of mentions")  +
  scale_colour_grey() + scale_fill_grey(start = 0, end = .7)  +   theme_bw() +
  scale_x_discrete(limits = timing.data$x,labels=timing.data$nom.name) +
  theme(plot.title = element_text(size =title.size, face = "bold", hjust=.5), 
        axis.text.x = element_text(angle = 45, hjust = 1,size=x.axis.tick.size),
        axis.title.x=element_blank(), 
        axis.text.y = element_text(size=y.axis.tick.size),
        axis.title.y = element_text(size=y.axis.label.size),
        legend.position = c(0.1, 0.7))

#grouping by era.
era.data <- dplyr::select(story.data, nominee,era)
era.data <- era.data[!duplicated(era.data),]#delete duplicate rows
tactics.data <- left_join(tactics.data, era.data)

timing.data.era <- tactics.data  %>% 
  group_by(era, timingf) %>% 
  summarise(total.mentions=n())
timing.data.era <- timing.data.era   %>% 
  group_by(era) %>% 
  mutate(era.counts=sum(total.mentions), total.mentions.prop  =total.mentions/era.counts)

p2 <- ggplot(timing.data.era, aes(x=era, y= total.mentions,fill=timingf)) +
  geom_bar(stat = "identity",position="dodge") +
  ylab("Number of mentions")  +
  ylim(0,max(timing.data.era$total.mentions)) +  
  annotate("text", x =.7, y = 270, label = "Pre hearings", cex=5, angle=90) +
  annotate("text", x =1, y = 300, label = "During hearings", cex=5, angle=90) +
  annotate("text", x =1.3, y = 260, label = "Post hearings", cex=5, angle=90) +
  scale_colour_grey() + scale_fill_grey(start = 0, end = .9)  +   theme_bw() +
  theme( plot.title = element_text(size =title.size, face = "bold", hjust=.5), 
         axis.text.x = element_text(angle = 0, size=14),
         axis.title.x=element_blank(), 
         axis.text.y = element_text(size=14),
         axis.title.y = element_text(size=y.axis.label.size),
         panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
         legend.position = "none")

#combined for paper
pdf("Figure_7.pdf", height=10, width=12,  onefile = TRUE)
grid.arrange(p1,p2)
dev.off()

#stats for text
 #how much mobilization before hearing in late period
sort( with(filter(tactics.data, era=="3) 1987-2017"), mean(timing=="pre_hearing")))
  #compared to first two periods
sort( with(filter(tactics.data, era!="3) 1987-2017"), mean(timing=="pre_hearing")))


#AMICUS data
ami.data <- read_csv("amicus_counts_for_IG_paper.csv")
ami.data <- rename(ami.data,amicus=total)
ami.data$source <- NULL
#assume 2016/17 the same as 2014 for now
foo <- data.frame(year=c(2015:2017), amicus=rep(ami.data$amicus[ami.data$year==2014], 3))
ami.data <- rbind(ami.data, foo)

p <- ggplot(ami.data, aes(year, amicus)) + 
  geom_line() + 
  ylab("Amicus briefs per year") + xlab("") +   ggtitle("Rising number of amicus briefs over time") +
  theme(plot.title = element_text(size =18, face = "bold", hjust=.5), 
        axis.text.x = element_text(size=14),
        axis.text.y = element_text(size=14),
        axis.title.x = element_text(size=14),
        axis.title.y = element_text(size=16))+
  scale_x_continuous(breaks = round(seq(min(ami.data$year), max(ami.data$year), by = 10),1)) 

pdf("Figure_A2.pdf", height=6, width=8)
p
dev.off()

##########################################################################################
#Post-regression analysis
##########################################################################################

#GRAPH SETTINGS
title.size <- 20
nom.label.size <- 10
x.axis.tick.size <- 10
y.axis.tick.size <- 14
x.axis.label.size <- 18
y.axis.label.size <- 18
axis.text <-1.8
title.text <- 2
label.text <- 1.5
y.axis.max <- .6
shading <- 'grey90'
width <- 3

nom.data.for.post <-  read.dta13("nom_data_for_postestimation.dta", convert.underscore = TRUE) 

#SIMS FOR ALL GROUPS MODEL
sims <- read.dta13("negbin_sims1_full_time_model_all_groups_new_rr.dta", convert.underscore = TRUE)
sims <- sims[1:1000,]
#clean up names
colnames(sims) 
colnames(sims) <- gsub("foo.dv.total.","",colnames(sims))
colnames(sims) 

#set up sims for each year
seq.l <- min(nom.data.for.post$time):max(nom.data.for.post$time)
n.sims <- nrow(sims) #1,000 but can vary for computational speed
lo.qual <- quantile(nom.data.for.post$quality, .25)
hi.qual <- quantile(nom.data.for.post$quality, .75)
lo.extr <- quantile(nom.data.for.post$extremity, .25)
hi.extr <- quantile(nom.data.for.post$extremity, .75)
lo.qual.matrix <- matrix(NA, nrow=n.sims, ncol=length(seq.l))
hi.qual.matrix <- matrix(NA, nrow=n.sims, ncol=length(seq.l))
lo.extr.matrix <- matrix(NA, nrow=n.sims, ncol=length(seq.l))
hi.extr.matrix <- matrix(NA, nrow=n.sims, ncol=length(seq.l))

#get preds using exp(XB) (from functional form of negative binomial)
#what we want to do:
#for every year (time), calculate predicted  number of groups mobilizing, vary qual and extremity (but only one at a time)
#create mean vars for sims
amicus.temp = mean(nom.data.for.post$log.amicus)
extremity.temp = mean(nom.data.for.post$extremity)
quality.temp = mean(nom.data.for.post$quality)
total.groups.lag1.temp = mean(nom.data.for.post$total.groups.lag1, na.rm=TRUE)
time.diff1.temp = mean(nom.data.for.post$time.diff1,na.rm=TRUE)
total.lag.inter.temp = mean(nom.data.for.post$total.lag.inter, na.rm=TRUE)

for (i in 1:length(seq.l)){
  time.temp <- seq.l[i]
  #QUALITY
  #lo qual
  lo.qual.matrix[,i] <- exp(
    sims$b.cons + 
      sims$b.total.groups.lag1*total.groups.lag1.temp  +
      sims$b.time.diff1 *  time.diff1.temp + 
      sims$b.quality*lo.qual    + 
      sims$b.time*time.temp +
      sims$b.extremity*extremity.temp +
      sims$b.log.amicus*amicus.temp +
      sims$b.total.lag.inter*total.lag.inter.temp +
      sims$b.quality.time*lo.qual*time.temp  +
      sims$b.extremity.time*extremity.temp*time.temp
  )
  #HIGH QUALITY
  hi.qual.matrix[,i] <- exp(
    sims$b.cons + 
      sims$b.total.groups.lag1*total.groups.lag1.temp  +
      sims$b.time.diff1 *  time.diff1.temp + 
      sims$b.quality*hi.qual    + 
      sims$b.time*time.temp +
      sims$b.extremity*extremity.temp +
      sims$b.log.amicus*amicus.temp +
      sims$b.total.lag.inter*total.lag.inter.temp +
      sims$b.quality.time*hi.qual*time.temp  +
      sims$b.extremity.time*extremity.temp*time.temp)
  #EXTREMITY
  #LOW extremity
  lo.extr.matrix[,i] <- exp(
    sims$b.cons + 
      sims$b.total.groups.lag1*total.groups.lag1.temp  +
      sims$b.time.diff1 *  time.diff1.temp + 
      sims$b.quality*quality.temp    + 
      sims$b.time*time.temp +
      sims$b.extremity*lo.extr +
      sims$b.log.amicus*amicus.temp +
      sims$b.total.lag.inter*total.lag.inter.temp +
      sims$b.quality.time*quality.temp*time.temp  +
      sims$b.extremity.time*lo.extr*time.temp)
  #High extremity
  hi.extr.matrix[,i] <- exp(
    sims$b.cons + 
      sims$b.total.groups.lag1*total.groups.lag1.temp  +
      sims$b.time.diff1 *  time.diff1.temp + 
      sims$b.quality*quality.temp    + 
      sims$b.time*time.temp +
      sims$b.extremity*hi.extr +
      sims$b.log.amicus*amicus.temp +
      sims$b.total.lag.inter*total.lag.inter.temp+
      sims$b.quality.time*quality.temp*time.temp  +
      sims$b.extremity.time*hi.extr*time.temp)
}

#get means and medians
lo.qual.median <-apply(lo.qual.matrix,2,function(x) quantile(x, probs=c(.5)))
lo.qual.mean  <- colMeans(	lo.qual.matrix)
hi.qual.median <-apply(hi.qual.matrix,2,function(x) quantile(x, probs=c(.5)))
hi.qual.mean  <- colMeans(	hi.qual.matrix)
lo.extr.median <-apply(lo.extr.matrix,2,function(x) quantile(x, probs=c(.5)))
lo.extr.mean  <- colMeans(	lo.extr.matrix)
hi.extr.median <-apply(hi.extr.matrix,2,function(x) quantile(x, probs=c(.5)))
hi.extr.mean  <- colMeans(	hi.extr.matrix)

#now take differences
qual.diff.matrix <- lo.qual.matrix - hi.qual.matrix
qual.diff.stats <- apply(qual.diff.matrix,2,function(x) quantile(x, probs=c(.05,.5,.95)))
extr.diff.matrix <- hi.extr.matrix-lo.extr.matrix
extr.diff.stats <- apply(extr.diff.matrix,2,function(x) quantile(x, probs=c(.05,.5,.95)))

#what is probabilty high greater than low
year <- 1930:2017
x <- colMeans(hi.extr.matrix>lo.extr.matrix)
#what is minimnum probability after 1980?
foo <- seq.l[year==1980] 
x <- x[foo:length(x)]
min(x)

#for text: in 1950 what was difference in quality between hi and lo
foo <-  seq.l[year==1950] 
mean(lo.qual.mean[foo])
mean(hi.qual.mean[foo])
#extrwemity in 2000
foo <-  seq.l[year==2005] 
mean(lo.extr.mean[foo])
mean(hi.extr.mean[foo])

pdf("Figure_8.pdf", width=12, height=10)

par(mar=c(2,5,1,1), mfrow=c(2,2))
#LEVELS
plot(year, lo.qual.mean, xlim=c(1930,2017),  ylim =c(0,60), ylab="", xlab="", type="n", main="",  axes=FALSE)
axis(1, at=seq(1930,2010, 10), labels=T, mgp=c(2,.7,0), cex.axis=axis.text)
axis(2, mgp=c(2,.5,0), cex.axis=axis.text, las=1)
points(year, lo.qual.mean, pch=19, type="l", lwd=width)
points(year, hi.qual.mean , lty=2, pch=19, type="l", lwd=width)
mtext("Predicted number of groups", 2,srt=90, cex=label.text, line=3, font=2)
mtext("A) Quality levels", 3, cex=title.text, line=-2, font=)
text(1960, 12, "Low quality", cex=label.text)
text(1960, 1, "High quality", cex=label.text)
box()
#now do extremity
plot(year, lo.extr.mean, xlim=c(1930,2017),  ylim =c(0,60), ylab="", xlab="", type="n", main="",  axes=FALSE)
axis(1, at=seq(1930,2010, 10), labels=T, mgp=c(2,.7,0), cex.axis=axis.text)
axis(2, mgp=c(2,.5,0), cex.axis=axis.text, las=1)
points(year, lo.extr.mean , pch=19, type="l", lwd=width ,lty=2)
points(year, hi.extr.mean , pch=19, type="l", lwd=width)
mtext("Predicted number of groups", 2,srt=90, cex=label.text, line=3, font=2)
mtext("B) Extremity levels", 3, cex=title.text, line=-2, font=)
text(1978, 21, "High extremity", cex=label.text)
text(1990, 4, "Low extremity", cex=label.text)
box()

#now do differences
plot(year, lo.qual.mean, xlim=c(1930,2017),  ylim =c(-5,40), ylab="", xlab="", type="n", main="", axes=FALSE)
axis(1, at=seq(1930,2010, 10), labels=T, mgp=c(2,.7,0), cex.axis=axis.text)
axis(2,  mgp=c(2,.5,0), cex.axis=axis.text, las=1)
polygon(c(rev(year), year), c(rev(qual.diff.stats [3,]),qual.diff.stats [1,]), col = shading, border = NA)
points(year, qual.diff.stats[2,] ,pch=19, type="l", lwd=width)
abline(h=0, lwd=2,lty=2)
mtext("Difference in predicted number of groups", 2,srt=90, cex=label.text-.2, line=3.5, font=2)
mtext("C) Quality differences", 3, cex=title.text, line=-2, font=)
box()

#now do differences
plot(year, lo.extr.mean, xlim=c(1930,2017),  ylim =c(-5,40), ylab="", xlab="", type="n", main="",  axes=FALSE)
axis(1, at=seq(1930,2010, 10), labels=T, mgp=c(2,.7,0), cex.axis=axis.text)
axis(2,  mgp=c(2,.5,0), cex.axis=axis.text, las=1)
polygon(c(rev(year), year), c(rev(extr.diff.stats [3,]),extr.diff.stats [1,]), col = shading, border = NA)
points(year, extr.diff.stats[2,] ,  pch=19, type="l", lwd=width)
mtext("Difference in predicted number of groups", 2,srt=90, cex=label.text-.2, line=3.5, font=2)
mtext("D) Extremity differences", 3, cex=title.text, line=-2, font=)
box()
abline(h=0, lwd=2,lty=2)

dev.off()

##################################################################################################
##################################################################################################
##################################################################################################
#REPLICATE USING NEGATIVE GROUPS MODEL
##################################################################################################
##################################################################################################
##################################################################################################

sims <- read.dta13("negbin_sims1_full_time_model_neg_groups_new_rr.dta", convert.underscore = TRUE)
#clean up names
#clean up names
colnames(sims) 
colnames(sims) <- gsub("foo.dv.neg.","",colnames(sims))
colnames(sims) 

#set up sims for each year
seq.l <- min(nom.data.for.post$time):max(nom.data.for.post$time)
n.sims <- nrow(sims) #1,000 but can vary for computational speed
lo.qual <- quantile(nom.data.for.post$quality, .25)
hi.qual <- quantile(nom.data.for.post$quality, .75)
lo.extr <- quantile(nom.data.for.post$extremity, .25)
hi.extr <- quantile(nom.data.for.post$extremity, .75)
lo.qual.matrix <- matrix(NA, nrow=n.sims, ncol=length(seq.l))
hi.qual.matrix <- matrix(NA, nrow=n.sims, ncol=length(seq.l))
lo.extr.matrix <- matrix(NA, nrow=n.sims, ncol=length(seq.l))
hi.extr.matrix <- matrix(NA, nrow=n.sims, ncol=length(seq.l))

#get preds using exp(XB) (from functional form of negative binomial)
#what we want to do:
#for every year (time), calculate predicted  number of groups mobilizing, vary qual and extremity (but only one at a time)

# nbreg foo_dv neg_groups_lag1 time_diff1 quality time  extremity log_amicus ///
#   lag_time_inter  quality_time extremity_time

#create mean vars for sims
amicus.temp = mean(nom.data.for.post$log.amicus)
extremity.temp = mean(nom.data.for.post$extremity)
quality.temp = mean(nom.data.for.post$quality)
neg.groups.lag1.temp = mean(nom.data.for.post$neg.groups.lag1, na.rm=TRUE)
time.diff1.temp = mean(nom.data.for.post$time.diff1,na.rm=TRUE)
neg.lag.inter.temp = mean(nom.data.for.post$neg.lag.inter, na.rm=TRUE)

for (i in 1:length(seq.l)){
  time.temp <- seq.l[i]
  #QUALITY
  #lo qual
  lo.qual.matrix[,i] <- exp(
    sims$b.cons + 
      sims$b.neg.groups.lag1*neg.groups.lag1.temp  +
      sims$b.time.diff1 *  time.diff1.temp + 
      sims$b.quality*lo.qual    + 
      sims$b.time*time.temp +
      sims$b.extremity*extremity.temp +
      sims$b.log.amicus*amicus.temp +
      sims$b.neg.lag.inter*neg.lag.inter.temp +
      sims$b.quality.time*lo.qual*time.temp  +
      sims$b.extremity.time*extremity.temp*time.temp
  )
  #HIGH QUALITY
  hi.qual.matrix[,i] <- exp(
    sims$b.cons + 
      sims$b.neg.groups.lag1*neg.groups.lag1.temp  +
      sims$b.time.diff1 *  time.diff1.temp + 
      sims$b.quality*hi.qual    + 
      sims$b.time*time.temp +
      sims$b.extremity*extremity.temp +
      sims$b.log.amicus*amicus.temp +
      sims$b.neg.lag.inter*neg.lag.inter.temp +
      sims$b.quality.time*hi.qual*time.temp  +
      sims$b.extremity.time*extremity.temp*time.temp)
  #EXTREMITY
  #LOW extremity
  lo.extr.matrix[,i] <- exp(
    sims$b.cons + 
      sims$b.neg.groups.lag1*neg.groups.lag1.temp  +
      sims$b.time.diff1 *  time.diff1.temp + 
      sims$b.quality*quality.temp    + 
      sims$b.time*time.temp +
      sims$b.extremity*lo.extr +
      sims$b.log.amicus*amicus.temp +
      sims$b.neg.lag.inter*neg.lag.inter.temp +
      sims$b.quality.time*quality.temp*time.temp  +
      sims$b.extremity.time*lo.extr*time.temp)
  #High extremity
  hi.extr.matrix[,i] <- exp(
    sims$b.cons + 
      sims$b.neg.groups.lag1*neg.groups.lag1.temp  +
      sims$b.time.diff1 *  time.diff1.temp + 
      sims$b.quality*quality.temp    + 
      sims$b.time*time.temp +
      sims$b.extremity*hi.extr +
      sims$b.log.amicus*amicus.temp +
      sims$b.neg.lag.inter*neg.lag.inter.temp+
      sims$b.quality.time*quality.temp*time.temp  +
      sims$b.extremity.time*hi.extr*time.temp)
}

#get means and medians
lo.qual.median <-apply(lo.qual.matrix,2,function(x) quantile(x, probs=c(.5)))
lo.qual.mean  <- colMeans(	lo.qual.matrix)
hi.qual.median <-apply(hi.qual.matrix,2,function(x) quantile(x, probs=c(.5)))
hi.qual.mean  <- colMeans(	hi.qual.matrix)
lo.extr.median <-apply(lo.extr.matrix,2,function(x) quantile(x, probs=c(.5)))
lo.extr.mean  <- colMeans(	lo.extr.matrix)
hi.extr.median <-apply(hi.extr.matrix,2,function(x) quantile(x, probs=c(.5)))
hi.extr.mean  <- colMeans(	hi.extr.matrix)

#now take differences
qual.diff.matrix <- lo.qual.matrix - hi.qual.matrix
qual.diff.stats <- apply(qual.diff.matrix,2,function(x) quantile(x, probs=c(.05,.5,.95)))
extr.diff.matrix <- hi.extr.matrix-lo.extr.matrix
extr.diff.stats <- apply(extr.diff.matrix,2,function(x) quantile(x, probs=c(.05,.5,.95)))

#what is probabilty high greater than low
year <- 1930:2017
x <- colMeans(hi.extr.matrix>lo.extr.matrix)
#what is minimnum probability after 1980?
foo <- seq.l[year==1980] 
x <- x[foo:length(x)]
min(x)

pdf("Figure_A5.pdf", width=12, height=10)

par(mar=c(2,5,1,1), mfrow=c(2,2))

plot(year, lo.qual.mean, xlim=c(1930,2017),  ylim =c(0,40), ylab="", xlab="", type="n", main="",  axes=FALSE)
axis(1, at=seq(1930,2010, 10), labels=T, mgp=c(2,.7,0), cex.axis=axis.text)
axis(2, mgp=c(2,.5,0), cex.axis=axis.text, las=1)
points(year, lo.qual.mean, pch=19, type="l", lwd=width)
points(year, hi.qual.mean , lty=2, pch=19, type="l", lwd=width)
mtext("Predicted number of groups", 2,srt=90, cex=label.text, line=3, font=2)
mtext("A) Quality levels", 3, cex=title.text, line=-2, font=)
text(1960, 7, "Low quality", cex=label.text)
text(1960, 0, "High quality", cex=label.text)
box()

#now do extremity
plot(year, lo.extr.mean, xlim=c(1930,2017),  ylim =c(0,40), ylab="", xlab="", type="n", main="",  axes=FALSE)
axis(1, at=seq(1930,2010, 10), labels=T, mgp=c(2,.7,0), cex.axis=axis.text)
axis(2, mgp=c(2,.5,0), cex.axis=axis.text, las=1)
points(year, lo.extr.mean , pch=19, type="l", lwd=width ,lty=2)
points(year, hi.extr.mean , pch=19, type="l", lwd=width)
mtext("Predicted number of groups", 2,srt=90, cex=label.text, line=3, font=2)
mtext("B) Extremity levels", 3, cex=title.text, line=-2, font=)
text(1998, 21, "High extremity", cex=label.text)
text(1990, 2, "Low extremity", cex=label.text)
box()

#now do differences
plot(year, lo.qual.mean, xlim=c(1930,2017),  ylim =c(-5,40), ylab="", xlab="", type="n", main="", axes=FALSE)
axis(1, at=seq(1930,2010, 10), labels=T, mgp=c(2,.7,0), cex.axis=axis.text)
axis(2,  mgp=c(2,.5,0), cex.axis=axis.text, las=1)
polygon(c(rev(year), year), c(rev(qual.diff.stats [3,]),qual.diff.stats [1,]), col = shading, border = NA)
points(year, qual.diff.stats[2,] ,pch=19, type="l", lwd=width)
abline(h=0, lwd=2,lty=2)
mtext("Difference in predicted number of groups", 2,srt=90, cex=label.text-.2, line=3.5, font=2)
mtext("C) Quality differences", 3, cex=title.text, line=-2, font=)
box()

#now do differences
plot(year, lo.extr.mean, xlim=c(1930,2017),  ylim =c(-5,40), ylab="", xlab="", type="n", main="",  axes=FALSE)
axis(1, at=seq(1930,2010, 10), labels=T, mgp=c(2,.7,0), cex.axis=axis.text)
axis(2,  mgp=c(2,.5,0), cex.axis=axis.text, las=1)
polygon(c(rev(year), year), c(rev(extr.diff.stats [3,]),extr.diff.stats [1,]), col = shading, border = NA)
points(year, extr.diff.stats[2,] ,  pch=19, type="l", lwd=width)
mtext("Difference in predicted number of groups", 2,srt=90, cex=label.text-.2, line=3.5, font=2)
mtext("D) Extremity differences", 3, cex=title.text, line=-2, font=)
box()
abline(h=0, lwd=2,lty=2)

dev.off()
